package com.example.hadoopdemo.executor.recommend.movie;

import lombok.AllArgsConstructor;
import lombok.Data;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.*;

/**
 * 矩阵计算推荐结果
 * 弊端：
 * （1）两个输入数据集，有严格的读入顺序。由于Hadoop不能指定读入顺序，因此在多节点的Hadoop集群环境，读入顺序有可能会发生错误，造成程序的空指针错误；
 * （2）这个计算过程，在内存中实现。如果矩阵过大，会造成单节点的内存不足。
 *
 * @author Ruison
 * @date 2021/12/8
 */
public class Step4 {
    /**
     * 矩阵相乘
     */
    public static class PartialMultiply extends Mapper<LongWritable, Text, IntWritable, Text> {
        private final static IntWritable k = new IntWritable();
        private final static Text v = new Text();
        private final static Map<Integer, List<CoOccurrence>> coOccurrenceMatrix = new HashMap<>();

        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, Text>.Context context) throws IOException, InterruptedException {
            String[] values = Recommend.DELIMITER.split(value.toString());
            // 兼容两份结构不一致的数据
            String[] v1 = values[0].split(":");
            String[] v2 = values[1].split(":");
            if (v1.length > 1) {
                int itemId1 = Integer.parseInt(v1[0]);
                int itemId2 = Integer.parseInt(v1[1]);
                int num = Integer.parseInt(values[1]);
                List<CoOccurrence> list = coOccurrenceMatrix.getOrDefault(itemId1, new ArrayList<>());
                list.add(new CoOccurrence(itemId1, itemId2, num));
                coOccurrenceMatrix.put(itemId1, list);
            }

            if (v2.length > 1) {
                int userId = Integer.parseInt(v2[0]);
                int itemId = Integer.parseInt(values[0]);
                double preferenceScore = Double.parseDouble(v2[1]);
                k.set(userId);
                for (CoOccurrence co : coOccurrenceMatrix.get(itemId)) {
                    v.set(co.getItemId2() + "," + preferenceScore * co.getNum());
                    context.write(k, v);
                }
            }
        }
    }

    /**
     * 矩阵相加
     */
    public static class AggregateAndRecommend extends Reducer<IntWritable, Text, IntWritable, Text> {
        private final static Text v = new Text();

        @Override
        protected void reduce(IntWritable key, Iterable<Text> values, Reducer<IntWritable, Text, IntWritable, Text>.Context context) throws IOException, InterruptedException {
            Map<String, Double> result = new HashMap<>(16);
            for (Text value : values) {
                String[] arr = value.toString().split(",");
                if (result.containsKey(arr[0])) {
                    result.put(arr[0], result.get(arr[0]) + Double.parseDouble(arr[1]));
                } else {
                    result.put(arr[0], Double.parseDouble(arr[1]));
                }
            }
            for (Map.Entry<String, Double> entry : result.entrySet()) {
                String itemId = entry.getKey();
                double score = entry.getValue();
                v.set(itemId + "," + score);
                context.write(key, v);
            }
        }
    }

    @Data
    @AllArgsConstructor
    public static class CoOccurrence {
        private int itemId1;
        private int itemId2;
        private int num;
    }
}
